import librosa.util
import numpy as np
import random
"""
本代码用于提取各种语音切片、拼接
"""


"""
剪切数据集操作
"""
import os
import soundfile as sf
def clip_gen_audio(dataset_dir,new_dir):
    files=os.listdir(dataset_dir)
    for fname in files:
        x,sr=sf.read(os.path.join(dataset_dir,fname))
        x= x[:40000]
        # x=np.clip(x,-1,1)
        x.reshape(-1,1)
        # x=librosa.util.normalize(x)
        sf.write(os.path.join(new_dir,fname),x,sr)

    return


"""
修改label操作
"""
def gen_new_label(old_label_path,new_label_path):
    with open(old_label_path,'r') as f:
        data=f.readlines()
    all_filename=[]
    all_label=[]
    for item in data:
        fname=item.split(" ")[1].strip()+".flac"
        label=item.split(" ")[-1].strip()
        if label=="bonafide":
            label="genuine"
        elif label=="spoof":
            label="fake"
        else:
            print("error")
        all_filename.append(fname)
        all_label.append(label)
    with open(new_label_path,"w") as f:
        for name,label in zip(all_filename,all_label):
            f.write(name+" "+label+"\n")
    print("OK")
    return


if __name__=="__main__":
    #dataset_dir=r"N:\csy\ASVNorm64600Dataset\train"
    # dataset_dir = r"N:\csy\ASVNorm64600Dataset\dev"

    #new_dir = r"N:\csy\ASVNormClip64600Dataset\train"
    dataset_dir = r"N:\csy\ASVNormClip64600Dataset\dev"
    new_dir = r"N:\csy\ASVNormClip40000Dataset\dev"
    clip_gen_audio(dataset_dir,new_dir)


    # old_label_path=r"M:\ASVspoof\ASVspoofdata\data_logical\ASVspoof2019_LA_protocols\ASVspoof2019.LA.cm.train.trn.txt"
    # new_label_path=r"F:\ALLDATASET\ASVNorm64600Dataset\train_label.txt"

    # old_label_path=r"M:\ASVspoof\ASVspoofdata\data_logical\ASVspoof2019_LA_protocols\ASVspoof2019.LA.cm.dev.trl.txt"
    # new_label_path=r"F:\ALLDATASET\ASVNorm64600Dataset\dev_label.txt"
    # gen_new_label(old_label_path,new_label_path)